import matplotlib.pyplot as plt
import seaborn as sns2. Exploratory Plots
3.1 Correlation Matrix
# Create a list of all variables
variables = ['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023', 'REALGDPpercapita']
# Create a list of selected variables for later analysis
selected_variables = ['REALGDPpercapita','life_expectancy','MedHHInc','PctBach','UnemploymentRate','LabForParticipationRate', 'Labor_Productivity_2023', 'TotalPop', 'PovertyRate', 'netexport']
# Calculate the correlation matrix
corr_matrix = us_rescaled_final[variables].corr()
# Plot the correlation matrix using seaborn
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()
3.2 Repeated Chart and Bubble Plot
import altair as alt# Setup the selection brush
brush = alt.selection_interval()
# Repeated chart
(
alt.Chart(us_rescaled_final)
.mark_circle()
.encode(
x=alt.X(alt.repeat("column"), type="quantitative", scale=alt.Scale(zero=False)),
y=alt.Y(alt.repeat("row"), type="quantitative", scale=alt.Scale(zero=False)),
color=alt.condition(
brush, "NAME_x:N", alt.value("lightgray")
), # conditional color
tooltip=['NAME_x'] + variables
)
.properties(
width=200,
height=200,
)
.add_params(brush)
.repeat( # repeat variables across rows and columns
row=variables,
column=variables,
)
)# Define dropdown bindings for both x and y axes
dropdown_x = alt.binding_select(
options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
name='X-axis column '
)
dropdown_y = alt.binding_select(
options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
name='Y-axis column '
)
dropdown_size = alt.binding_select(
options=['MedHHInc','TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed','PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023'],
name='Bubble Size '
)
# Create parameters for x and y axes
xcol_param = alt.param(
value='MedHHInc',
bind=dropdown_x
)
ycol_param = alt.param(
value='MedHHInc',
bind=dropdown_y
)
size_param = alt.param(
value='MedHHInc',
bind=dropdown_size
)
chart2 = alt.Chart(us_rescaled_final).mark_circle().encode(
x=alt.X('x:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
y=alt.Y('y:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
size=alt.Size('size:Q', scale=alt.Scale(zero=False, domain='unaggregated')).title(''),
color='NAME_x:N',
tooltip=['NAME_x'] + variables # Concatenate NAME_x with the existing variables list
).transform_calculate(
x=f'datum[{xcol_param.name}]',
y=f'datum[{ycol_param.name}]',
size=f'datum[{size_param.name}]'
).add_params(
xcol_param,
ycol_param,
size_param,
).properties(width=800, height=800)
chart23.3 Map
!pip install geopandas hvplot panelimport geopandas as gpd
import hvplot.pandas
import panel as pn# Convert from wide to long data
us_rescaled_final_long = pd.melt(us_rescaled_final,
id_vars = ['STATEFP', 'STATENS', 'GEOIDFQ', 'GEOID', 'STUSPS', 'NAME_x', 'LSAD','ALAND', 'AWATER', 'geometry', 'NAME_y', 'GEO_ID'],
value_vars=['MedHHInc', 'EducTotal', 'EducBelowHighSch', 'EducHighSch', 'EducAssoc', 'EducBach', 'TotalPop', 'TotalPop16', 'LabForTotal', 'Unemployed', 'PopPovertyDetermined', 'PovertyPop', 'PctBach', 'PovertyRate', 'UnemploymentRate', 'LabForParticipationRate', 'netexport', 'REALGDP', 'life_expectancy', 'Labor_Productivity_2023', 'REALGDPpercapita']
)chart3 = us_rescaled_final_long.hvplot(
c="value",
dynamic=False,
width=1000,
height=1000,
geo=True,
cmap="viridis",
groupby="variable")chart3